library(haven)
library(tidyverse)
library(rdrobust)
library(ggplot2)
library(estimatr)
library(formatR)
<- read_dta("LMB-Data.dta") LMB_Data
$d <- ifelse(LMB_Data$demvoteshare < 0.5, 0, 1)
LMB_Data$x_c <- LMB_Data$demvoteshare - 0.5
LMB_Data<- c("demvoteshare", "score", "age", "sex", "medianincome", "pcturban",
column_dem "pctblack", "d", "x_c")
<- LMB_Data %>%
sum_dem1 filter(LMB_Data$d == 1) %>%
select(all_of(column_dem))
<- LMB_Data %>%
sum_dem0 filter(LMB_Data$d == 0) %>%
select(all_of(column_dem))
summary(sum_dem1)
## demvoteshare score age sex
## Min. :0.5002 Min. :-27.92 Min. :26.00 Min. :1.000
## 1st Qu.:0.5916 1st Qu.: 37.11 1st Qu.:44.00 1st Qu.:1.000
## Median :0.6870 Median : 66.00 Median :51.00 Median :1.000
## Mean :0.7279 Mean : 58.39 Mean :51.85 Mean :1.036
## 3rd Qu.:0.8446 3rd Qu.: 80.46 3rd Qu.:59.00 3rd Qu.:1.000
## Max. :1.0000 Max. :128.85 Max. :88.00 Max. :2.000
## NA's :40 NA's :40
## medianincome pcturban pctblack d
## Min. : 1968 Min. :0.1770 Min. :0.0000 Min. :1
## 1st Qu.: 6193 1st Qu.:0.5354 1st Qu.:0.0260 1st Qu.:1
## Median : 9300 Median :0.8034 Median :0.0667 Median :1
## Mean :11199 Mean :0.7460 Mean :0.1342 Mean :1
## 3rd Qu.:16048 3rd Qu.:0.9960 3rd Qu.:0.1929 3rd Qu.:1
## Max. :33404 Max. :1.0000 Max. :0.9205 Max. :1
## NA's :2448 NA's :2448 NA's :2448
## x_c
## Min. :0.0002307
## 1st Qu.:0.0915734
## Median :0.1869732
## Mean :0.2278590
## 3rd Qu.:0.3445815
## Max. :0.5000000
##
summary(sum_dem0)
## demvoteshare score age sex
## Min. :0.0000 Min. :-27.92 Min. :27.00 Min. :1.000
## 1st Qu.:0.3204 1st Qu.: 4.49 1st Qu.:45.00 1st Qu.:1.000
## Median :0.3836 Median : 12.28 Median :52.00 Median :1.000
## Mean :0.3663 Mean : 17.58 Mean :51.97 Mean :1.035
## 3rd Qu.:0.4433 3rd Qu.: 25.16 3rd Qu.:59.00 3rd Qu.:1.000
## Max. :0.4999 Max. :115.87 Max. :86.00 Max. :2.000
## NA's :109 NA's :105
## medianincome pcturban pctblack d
## Min. : 2085 Min. :0.1710 Min. :0.0000 Min. :0
## 1st Qu.: 6459 1st Qu.:0.5030 1st Qu.:0.0098 1st Qu.:0
## Median : 9474 Median :0.6457 Median :0.0280 Median :0
## Mean :12003 Mean :0.6661 Mean :0.0553 Mean :0
## 3rd Qu.:17738 3rd Qu.:0.8663 3rd Qu.:0.0600 3rd Qu.:0
## Max. :33404 Max. :1.0000 Max. :0.5420 Max. :0
## NA's :1881 NA's :1881 NA's :1881
## x_c
## Min. :-0.500000
## 1st Qu.:-0.179633
## Median :-0.116419
## Mean :-0.133669
## 3rd Qu.:-0.056731
## Max. :-0.000125
##
<- LMB_Data %>%
dem1 filter(LMB_Data$d == 1 & LMB_Data$demvoteshare > 0.4 & LMB_Data$demvoteshare <
0.6) %>%
select(all_of(column_dem))
summary(dem1)
## demvoteshare score age sex
## Min. :0.5002 Min. :-18.40 Min. :26.00 Min. :1.000
## 1st Qu.:0.5228 1st Qu.: 52.27 1st Qu.:41.00 1st Qu.:1.000
## Median :0.5465 Median : 70.03 Median :47.00 Median :1.000
## Mean :0.5485 Mean : 64.40 Mean :48.65 Mean :1.032
## 3rd Qu.:0.5736 3rd Qu.: 80.18 3rd Qu.:55.00 3rd Qu.:1.000
## Max. :0.5998 Max. :128.85 Max. :87.00 Max. :2.000
## NA's :16 NA's :16
## medianincome pcturban pctblack d
## Min. : 2608 Min. :0.1930 Min. :0.0000 Min. :1
## 1st Qu.: 6343 1st Qu.:0.5214 1st Qu.:0.0170 1st Qu.:1
## Median : 8944 Median :0.6959 Median :0.0397 Median :1
## Mean :10691 Mean :0.7110 Mean :0.0751 Mean :1
## 3rd Qu.:12894 3rd Qu.:0.9432 3rd Qu.:0.1046 3rd Qu.:1
## Max. :30726 Max. :1.0000 Max. :0.8893 Max. :1
## NA's :744 NA's :744 NA's :744
## x_c
## Min. :0.0002307
## 1st Qu.:0.0227644
## Median :0.0464677
## Mean :0.0484703
## 3rd Qu.:0.0736306
## Max. :0.0997699
##
<- LMB_Data %>%
dem0 filter(LMB_Data$d == 0 & LMB_Data$demvoteshare > 0.4 & LMB_Data$demvoteshare <
0.6) %>%
select(all_of(column_dem))
summary(dem0)
## demvoteshare score age sex
## Min. :0.4000 Min. :-27.92 Min. :27.00 Min. :1.000
## 1st Qu.:0.4264 1st Qu.: 4.22 1st Qu.:43.00 1st Qu.:1.000
## Median :0.4498 Median : 11.86 Median :51.00 Median :1.000
## Mean :0.4503 Mean : 16.76 Mean :51.35 Mean :1.026
## 3rd Qu.:0.4745 3rd Qu.: 24.35 3rd Qu.:59.00 3rd Qu.:1.000
## Max. :0.4999 Max. :115.87 Max. :83.00 Max. :2.000
## NA's :74 NA's :70
## medianincome pcturban pctblack d
## Min. : 2085 Min. :0.1710 Min. :0.0000 Min. :0
## 1st Qu.: 5958 1st Qu.:0.4725 1st Qu.:0.0090 1st Qu.:0
## Median : 8466 Median :0.6196 Median :0.0317 Median :0
## Mean :10335 Mean :0.6470 Mean :0.0634 Mean :0
## 3rd Qu.:12684 3rd Qu.:0.8259 3rd Qu.:0.0849 3rd Qu.:0
## Max. :29850 Max. :1.0000 Max. :0.5420 Max. :0
## NA's :1125 NA's :1125 NA's :1125
## x_c
## Min. :-0.099996
## 1st Qu.:-0.073634
## Median :-0.050198
## Mean :-0.049742
## 3rd Qu.:-0.025502
## Max. :-0.000125
##
ggplot(data = LMB_Data, aes(x = demvoteshare, y = score)) + geom_point() + labs(title = "Democratic vote share ADA score",
x = "Democratic Vote Share", y = "ADA Score")
ggplot(data = LMB_Data, aes(x = demvoteshare, y = score)) + geom_jitter(width = 0.007,
height = 0.007) + labs(title = "Democratic vote share ADA score with some jitter",
x = "Democratic Vote Share", y = "ADA Score")
On slide 20, we wanted to see if there were any significant differences particularly the standard deviations, between the covariates on either side of the cutoff (0.4<demvoteshare<0.5 and 0.5<demvoteshare<0.6). This is to check covariate balance. After plotting and including some jitter in the scatterplot (since the data points were too close on one another), we can observe a jump, or discontinuity, in the Americans for Democratic Action (ADA) score between winning and losing an election.
<- LMB_Data %>%
subset_lmb select(all_of(column_dem)) %>%
filter(LMB_Data$demvoteshare > 0.4 & LMB_Data$demvoteshare < 0.6)
<- LMB_Data %>%
lmb_4 select(all_of(column_dem)) %>%
filter(LMB_Data$demvoteshare > 0.4 & LMB_Data$demvoteshare < 0.5)
<- LMB_Data %>%
lmb_6 select(all_of(column_dem)) %>%
filter(LMB_Data$demvoteshare > 0.5 & LMB_Data$demvoteshare < 0.6)
<- lm_robust(score ~ d + x_c + I(x_c * x_c) + I(d * x_c) + I(d * x_c * x_c),
lm1 data = subset_lmb)
summary(lm1)
##
## Call:
## lm_robust(formula = score ~ d + x_c + I(x_c * x_c) + I(d * x_c) +
## I(d * x_c * x_c), data = subset_lmb)
##
## Standard error type: HC2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|) CI Lower CI Upper DF
## (Intercept) 17.712 1.184 14.9559 2.004e-49 15.39 20.03 4626
## d 45.928 1.852 24.7987 1.425e-127 42.30 49.56 4626
## x_c 38.640 54.129 0.7138 4.754e-01 -67.48 144.76 4626
## I(x_c * x_c) 295.172 514.038 0.5742 5.658e-01 -712.59 1302.93 4626
## I(d * x_c) 6.507 88.661 0.0734 9.415e-01 -167.31 180.33 4626
## I(d * x_c * x_c) -744.025 867.517 -0.8576 3.911e-01 -2444.77 956.72 4626
##
## Multiple R-squared: 0.5549 , Adjusted R-squared: 0.5544
## F-statistic: 1132 on 5 and 4626 DF, p-value: < 2.2e-16
ggplot(data = LMB_Data, aes(x = demvoteshare, y = score)) + geom_jitter(width = 0.007,
height = 0.007) + labs(title = "Quadratic around window", x = "Democratic Vote Share",
y = "ADA Score") + geom_vline(xintercept = 0.4, color = "red") + geom_vline(xintercept = 0.5,
color = "red") + geom_vline(xintercept = 0.6, color = "red") + stat_smooth(data = lmb_4,
method = "lm", formula = y ~ poly(x, 2), se = F, color = "red") + stat_smooth(data = lmb_6,
method = "lm", formula = y ~ poly(x, 2), se = F, color = "red")
<- lm_robust(score ~ d + x_c + I(d * x_c), data = subset_lmb)
lm2 summary(lm2)
##
## Call:
## lm_robust(formula = score ~ d + x_c + I(d * x_c), data = subset_lmb)
##
## Standard error type: HC2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|) CI Lower CI Upper DF
## (Intercept) 17.227 0.7557 22.7955 4.230e-109 15.75 18.71 4628
## d 47.159 1.2178 38.7257 1.957e-284 44.77 49.55 4628
## x_c 9.422 13.0537 0.7218 4.705e-01 -16.17 35.01 4628
## I(d * x_c) -9.128 21.9320 -0.4162 6.773e-01 -52.12 33.87 4628
##
## Multiple R-squared: 0.5548 , Adjusted R-squared: 0.5545
## F-statistic: 1887 on 3 and 4628 DF, p-value: < 2.2e-16
<- lm_robust(score ~ d, data = subset_lmb)
lm3 summary(lm3)
##
## Call:
## lm_robust(formula = score ~ d, data = subset_lmb)
##
## Standard error type: HC2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|) CI Lower CI Upper DF
## (Intercept) 16.76 0.3766 44.50 0 16.02 17.50 4630
## d 47.64 0.6349 75.03 0 46.40 48.89 4630
##
## Multiple R-squared: 0.5548 , Adjusted R-squared: 0.5547
## F-statistic: 5630 on 1 and 4630 DF, p-value: < 2.2e-16
<- LMB_Data %>%
lmb_41 select(all_of(column_dem)) %>%
filter(LMB_Data$demvoteshare > 0.4 & LMB_Data$demvoteshare < 0.5 & LMB_Data$democrat ==
0)
<- LMB_Data %>%
lmb_61 select(all_of(column_dem)) %>%
filter(LMB_Data$demvoteshare > 0.5 & LMB_Data$demvoteshare < 0.6 & LMB_Data$democrat ==
1)
ggplot(data = LMB_Data, aes(x = demvoteshare, y = score)) + geom_jitter(width = 0.007,
height = 0.007) + labs(title = "Not including running variable in model", x = "Democratic Vote Share",
y = "ADA Score") + geom_vline(xintercept = 0.4, color = "red") + geom_vline(xintercept = 0.5,
color = "red") + geom_vline(xintercept = 0.6, color = "red") + stat_smooth(data = lmb_41,
method = "lm", formula = y ~ x, se = F, color = "red") + stat_smooth(data = lmb_61,
method = "lm", formula = y ~ x, se = F, color = "red")
As I noted in discussion 1, we notice a discontinuity in ADA score between winners and losers of an election. If the regression discontinuity method is valid, then the average voting records of Democrats who are barely elected should credibly represent, on average, how Democrats would have voted in districts that were actually, barely won by Republicans (and the same would occur for Republicans in barely won Democrat districts). In other words, as we compare elections that are closer and closer, all predetermined characteristics of Republican and Democratic districts become more and more similar. This is to test the convergence and divergence theories.
In the previous slides, we test the bandwidth h=0.1 around c=0.5. That is, we set a baseline at the cutoff of winning in a first past the post election at 0.5, or 50%, of the vote share. Winners with the bandwidth of 0.1 less and greater than the cutoff are considered close enough to be considered random. This bandwidth, as we examine later, may not be the best in terms of trade-off between variance and bias, but for now, it should suffice as a preliminary examination of the data.
On slide 31, we want to see how well a quadratic fits into our parametric model around our restricted window. Next we examine a linear fit around our restricted window. And finally, we examine our parametric model without a running variable around our restricted window.
All this was to show that any parametric model makes an assumption on the functional form on the relationship between X and Y. The best model depends on whether we use all the observations or not. Choosing the optimal window is important since that would affect how our covariates are balanced.
$score <- LMB_Data$score
LMB_Data<- LMB_Data$demvoteshare
x_lmb
<- rdrobust(LMB_Data$score, LMB_Data$demvoteshare, c = 0.5, h = 0.1, p = 2) rd1
## [1] "Mass points detected in the running variable."
summary(rd1)
## Call: rdrobust
##
## Number of Obs. 13577
## BW type Manual
## Kernel Triangular
## VCE method NN
##
## Number of Obs. 5480 8097
## Eff. Number of Obs. 2428 2204
## Order est. (p) 2 2
## Order bias (q) 3 3
## BW est. (h) 0.100 0.100
## BW bias (b) 0.100 0.100
## rho (h/b) 1.000 1.000
## Unique Obs. 2770 3351
##
## =============================================================================
## Method Coef. Std. Err. z P>|z| [ 95% C.I. ]
## =============================================================================
## Conventional 45.915 1.717 26.741 0.000 [42.550 , 49.280]
## Robust - - 19.826 0.000 [40.902 , 49.877]
## =============================================================================
<- rdrobust(LMB_Data$score, LMB_Data$demvoteshare, c = 0.5, p = 2, bwselect = "mserd") rd2
## [1] "Mass points detected in the running variable."
summary(rd2)
## Call: rdrobust
##
## Number of Obs. 13577
## BW type mserd
## Kernel Triangular
## VCE method NN
##
## Number of Obs. 5480 8097
## Eff. Number of Obs. 3197 2965
## Order est. (p) 2 2
## Order bias (q) 3 3
## BW est. (h) 0.136 0.136
## BW bias (b) 0.185 0.185
## rho (h/b) 0.732 0.732
## Unique Obs. 2770 3351
##
## =============================================================================
## Method Coef. Std. Err. z P>|z| [ 95% C.I. ]
## =============================================================================
## Conventional 46.227 1.460 31.666 0.000 [43.366 , 49.088]
## Robust - - 27.531 0.000 [42.768 , 49.324]
## =============================================================================
<- rdrobust(LMB_Data$score, LMB_Data$demvoteshare, c = 0.5, bwselect = "mserd") rd3
## [1] "Mass points detected in the running variable."
summary(rd3)
## Call: rdrobust
##
## Number of Obs. 13577
## BW type mserd
## Kernel Triangular
## VCE method NN
##
## Number of Obs. 5480 8097
## Eff. Number of Obs. 2112 1893
## Order est. (p) 1 1
## Order bias (q) 2 2
## BW est. (h) 0.086 0.086
## BW bias (b) 0.141 0.141
## rho (h/b) 0.609 0.609
## Unique Obs. 2770 3351
##
## =============================================================================
## Method Coef. Std. Err. z P>|z| [ 95% C.I. ]
## =============================================================================
## Conventional 46.491 1.241 37.477 0.000 [44.060 , 48.923]
## Robust - - 31.425 0.000 [43.293 , 49.052]
## =============================================================================
rdplot(LMB_Data$score, LMB_Data$demvoteshare, c = 0.5, h = 0.1, p = 2, x.label = "demvoteshare",
y.label = "score", title = "Regression function fit", col.dots = "gray")
## [1] "Mass points detected in the running variable."
rdplot(subset_lmb$score, subset_lmb$demvoteshare, c = 0.5, h = 0.1, p = 2, x.label = "demvoteshare",
y.label = "score", title = "Regression function fit", col.dots = "gray")
## [1] "Mass points detected in the running variable."
<- rdrobust(LMB_Data$score, LMB_Data$demvoteshare, c = 0.5, covs = LMB_Data$pcturban +
rd4 $pctblack, bwselect = "mserd") LMB_Data
## [1] "Mass points detected in the running variable."
summary(rd4)
## Call: rdrobust
##
## Number of Obs. 9248
## BW type mserd
## Kernel Triangular
## VCE method NN
##
## Number of Obs. 3599 5649
## Eff. Number of Obs. 1319 1488
## Order est. (p) 1 1
## Order bias (q) 2 2
## BW est. (h) 0.102 0.102
## BW bias (b) 0.157 0.157
## rho (h/b) 0.653 0.653
## Unique Obs. 1728 2394
##
## =============================================================================
## Method Coef. Std. Err. z P>|z| [ 95% C.I. ]
## =============================================================================
## Conventional 46.755 1.537 30.420 0.000 [43.743 , 49.767]
## Robust - - 25.134 0.000 [43.184 , 50.488]
## =============================================================================
On slide 49, we run a local polynomial regression discontinuity with robust bias-corrected confidence intervals and inference procedures to obtain an estimate of the average treatment effect of winning the election, 45.915. We are forcing the same bandwidth (h=0.10) around the cutoff like the previous slides. On slide 50, we allow the ‘rdrobust’ package function to obtain the optimal bandwidth with the bias-variance trade-off in mind. The bandwidth estimate is 0.136, which is not far from the 0.1 we were using earlier. On slide 51, we use the default polynomial degree (p=1) for our model. This, as we notice, results in a change in the bandwidth from 0.136 to 0.086—a smaller bandwidth.
Next, we plot the results we obtained on slides 49-51. On slide 53, we plot the binned the sample and on slide 55, narrow the range to the cutoff around our bandwidth. On slide 56, we incorporate the covariates pcturban and pctblack to see if they would change our estimates of the treatment effect. I obtained slightly different results than the slides. My bandwidth is h=0.102 instead of 0.106 and my estimate and standard errors are slightly higher. However, the estimated treatment effect still matches up with the estimates obtained in previous slides and is within the 95% confidence interval. Including covariates did not change the estimate of treatment effects since they are balanced.
We improved the fit with each model and the coefficient of d is significant with each model. The flat model was the best fit. We also see the treatment effect whether we look at the raw data or if we bin the data.
This means we obtained the same results as the original Lee, Moretti, and Butler (2004) paper. By focusing on narrowly won elections, we generated a quasi-experimental estimate. We can conclude that voters merely elect policies and that the strength of the electoral win has no effect on the winner’s behavior. The treatment effect at the cutoff 0.5 is around 45-47.